####################
#PODES balance data#
####################

oldwd = getwd()
#Directory
setwd('./data')

#Get objects
old_ws = ls()


############
#PODES 2003#
############

podes_2003_1 = fread('./PODES/podes2003/PODES03A.csv')
podes_2003_2 = fread('./PODES/podes2003/PODES03B.csv')
podes_2003_3 = fread('./PODES/podes2003/PODES03C.csv')
podes_2003_4 = fread('./PODES/podes2003/PODES03D.csv')

setkey(podes_2003_1, PROP, KAB, KEC, DESA, DRH)
setkey(podes_2003_2, PROP, KAB, KEC, DESA, DRH)
setkey(podes_2003_3, PROP, KAB, KEC, DESA, DRH)
setkey(podes_2003_4, PROP, KAB, KEC, DESA, DRH)

podes_2003 = podes_2003_2[podes_2003_1] %>% podes_2003_3[.] %>% podes_2003_4[.]
drop_cols = names(podes_2003)[str_detect(names(podes_2003), '^i.')]
podes_2003[, (drop_cols) := NULL]

rm(podes_2003_1, podes_2003_2, podes_2003_3, podes_2003_4); gc()

podes_2003[, kec_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB), sprintf('%03.f', KEC))]
podes_2003[, kab_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB))]



#Create variables:
podes_2003 = podes_2003[,
           list(
             econ_hh_ag_pct = weighted.mean(B4R402D/100,B4R402C),
             econ_hh_in_slums = sum(B5R511B4),
             econ_hh_elec = sum(B5R501B1 + B5R501B2),
             econ_kab_support = sum(B16R1603C),
             demo_scndschl = sum(B6R601D2 + B6R601D3),
             demo_hh = sum(B4R402C),
             demo_dcapital_dist = weighted.mean(B3R314, B4R402A + B4R402A),
             att_bars = sum(B9R906A > 0),
             relg_majority_muslim = sum(B8R807 == 1),
             relg_mosques = sum(B8R801A),
             relg_churches = sum(B8R801C + B8R801D),
             relg_madrasah = sum(B6R601H3),
             any_violence = sum(B17R1703 %in% 1),
             desa_count = .N,
             person_count = sum(B4R402A + B4R402A)
           ),
           by = list(kab_code, kec_code)]


############
#PODES 2008#
############

podes_2008_1 = fread('./PODES/podes2008/pds2008_d1_new.csv')
podes_2008_2 = fread('./PODES/podes2008/pds2008_d2_new.csv')
podes_2008_3 = fread('./PODES/podes2008/pds2008_d3_new.csv')

setkey(podes_2008_1, PROP, KAB, PROVKAB, KEC, DESA, KLA)
setkey(podes_2008_2, PROP, KAB, PROVKAB, KEC, DESA, KLA)
setkey(podes_2008_3, PROP, KAB, PROVKAB, KEC, DESA, KLA)

podes_2008 = podes_2008_2[podes_2008_1] %>% podes_2008_3[.]
rm(podes_2008_1, podes_2008_2, podes_2008_3); gc()

podes_2008[, kec_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB), sprintf('%03.f', KEC))]
podes_2008[, kab_code := paste0(sprintf('%02.f', PROP), sprintf('%02.f', KAB))]



#Create variables:
podes_2008 = podes_2008[,
                        list(
                          econ_hh_ag_pct = weighted.mean(R401D/100,R401C, na.rm = T),
                          econ_hh_in_slums = sum(R509B3, na.rm = T),
                          econ_hh_elec = sum(R501B1, na.rm = T) + sum(R501B2, na.rm = T),
                          econ_kab_support = sum(R13012A_3, na.rm=T),
                          demo_scndschl = sum(R601D_2, na.rm = T) + sum(R601D_3, na.rm = T),
                          demo_hh = sum(R401C),
                          demo_dcapital_dist = weighted.mean(R9021_2, R401A + R401B, na.rm = T),
                          att_bars = sum(R802A == 1, na.rm = T),
                          relg_majority_muslim = sum((R701 %in% 1 | R702 %in% 1), na.rm = T),
                          relg_mosques = sum(R703A),
                          relg_churches = sum(R703C + R703D),
                          relg_madrasah = sum(R601H + R601I),
                          any_violence = sum(R1201A %in% 1),
                          desa_count = .N,
                          person_count = sum(R401A + R401B)
                        ),
                        by = list(kab_code, kec_code)]



###################
#Collapse to dapil#
###################
crosswalk_2003 = fread('./crosswalks/kecamatan_dprd_2004_to_podes2003.csv')
crosswalk_2008 = fread('./crosswalks/kecamatan_dprd_2009_to_podes2008.csv')
setnames(crosswalk_2003, c('target_kecamatan', 'from_kecamatan'), c('kec_code', 'kecamatan_2004'))
setnames(crosswalk_2008, c('target_kecamatan', 'from_kecamatan'), c('kec_code', 'kecamatan_2009'))
crosswalk_2003[, kec_code := kec_code %>% as.character()]
crosswalk_2008[, kec_code := kec_code %>% as.character()]


setkey(podes_2003, kec_code)
setkey(podes_2008, kec_code)
setkey(crosswalk_2003, kec_code)
setkey(crosswalk_2008, kec_code)

podes_2003 = crosswalk_2003[podes_2003]
podes_2008 = crosswalk_2008[podes_2008]

#Load kecamatan to DPRD crosswalks
##################################

#2004
kec_to_dprd_2004 = fread("./crosswalks/kecamatan_to_dprd2_2004.csv")
setnames(kec_to_dprd_2004, 'DAPIL.NUMBER', "DAPIL_NUMBER")
kec_to_dprd_2004[KECA %in% 1, KECA := 10]
kec_to_dprd_2004[, id_kec := paste0(PROP, sprintf("%02.f", as.numeric(KABU)), sprintf("%03.f", as.numeric(KECA)))]
kec_to_dprd_2004[, dapil := paste(KAB_NAME, DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[!is.na(DAPIL_NUMBER)]
kec_to_dprd_2004 = kec_to_dprd_2004[, 
                                    list(provinsi = PROV_NAME,
                                         id_prov = PROP,
                                         kabupaten = KAB_NAME,
                                         id_kab = KAB_CODE,
                                         id_kec = as.numeric(id_kec), dapil)]
kec_to_dprd_2004[provinsi %in% "IRIAN JAYA BARAT", id_prov := 91]


#2009
kec_to_dprd_2009 = fread("./crosswalks/kecamatan_to_dprd2_2009.csv")
kec_to_dprd_2009 = kec_to_dprd_2009[, 
                                    list(provinsi = provinsi, id_prov = provno,
                                         kabupaten = kabkot, id_kab = id_kab,
                                         id_kec = id_kec, dapil = label)]

#Merge in dapil codes
#####################
setkey(podes_2003, kecamatan_2004)
setkey(kec_to_dprd_2004, id_kec)

setkey(podes_2008, kecamatan_2009)
setkey(kec_to_dprd_2009, id_kec)

podes_2003 = podes_2003[kec_to_dprd_2004]
podes_2008 = podes_2008[kec_to_dprd_2009]

#Drop duplicate pre_treatment kecamatan
podes_2003[, drop := duplicated(kec_code), by = list(id_kab, dapil)]
podes_2003[, kec_use_count := .N, by = kec_code]
podes_2008[, drop := duplicated(kec_code), by = list(id_kab, dapil)]
podes_2008[, kec_use_count := .N, by = kec_code]

#Collapse by dapil

podes_2003 = podes_2003[!(drop) & !is.na(kec_code), 
           list(
             econ_hh_ag_pct = weighted.mean(econ_hh_ag_pct,demo_hh),
             econ_hh_in_slums = sum(econ_hh_in_slums),
             econ_hh_elec = sum(econ_hh_elec),
             econ_kab_support = sum(econ_kab_support),
             demo_scndschl = sum(demo_scndschl),
             demo_hh = sum(demo_hh),
             demo_dcapital_dist = weighted.mean(demo_dcapital_dist, person_count),
             att_bars = sum(att_bars),
             relg_majority_muslim = sum(relg_majority_muslim),
             relg_mosques = sum(relg_mosques),
             relg_churches = sum(relg_churches),
             relg_madrasah = sum(relg_madrasah),
             desa_count = sum(desa_count),
             person_count = sum(person_count),
             any_violence = sum(any_violence),
             u_kec = kec_code %>% unique %>% length,
             u_kec_rep = kec_code[kec_use_count > 1] %>% length
           ), 
           by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]

podes_2008 = podes_2008[!(drop) & !is.na(kec_code), 
                  list(
                    econ_hh_ag_pct = weighted.mean(econ_hh_ag_pct,demo_hh),
                    econ_hh_in_slums = sum(econ_hh_in_slums),
                    econ_hh_elec = sum(econ_hh_elec),
                    econ_kab_support = sum(econ_kab_support),
                    demo_scndschl = sum(demo_scndschl),
                    demo_hh = sum(demo_hh),
                    demo_dcapital_dist = weighted.mean(demo_dcapital_dist, person_count),
                    att_bars = sum(att_bars),
                    relg_majority_muslim = sum(relg_majority_muslim),
                    relg_mosques = sum(relg_mosques),
                    relg_churches = sum(relg_churches),
                    relg_madrasah = sum(relg_madrasah),
                    desa_count = sum(desa_count),
                    person_count = sum(person_count),
                    any_violence = sum(any_violence),
                    u_kec = kec_code %>% unique %>% length,
                    u_kec_rep = kec_code[kec_use_count > 1] %>% length
                  ), 
                  by = list(provinsi, id_prov , kabupaten, id_kab, dapil)]

#Combine data:
podes_2003[, election_cycle := 2004]
podes_2008[, election_cycle := 2009]

podes_dapil_balance = rbindlist(list(podes_2003, podes_2008), use.names = T)

#Create variables:
podes_dapil_balance[, econ_hh_in_slums_pct := econ_hh_in_slums / demo_hh]
podes_dapil_balance[, econ_hh_elec_pct := econ_hh_elec / demo_hh]
podes_dapil_balance[, econ_hh_elec_pct := econ_hh_elec / demo_hh]
podes_dapil_balance[, econ_kab_support_per_cap := econ_kab_support / person_count]
podes_dapil_balance[, demo_scndschl_per_10k := demo_scndschl / person_count * 10000]
podes_dapil_balance[, att_bars_pct := att_bars / desa_count]
podes_dapil_balance[, relg_majority_muslim_pct := relg_majority_muslim / desa_count]
podes_dapil_balance[, relg_mosques_per_10k := relg_mosques / person_count * 10000]
podes_dapil_balance[, relg_churches_per_10k := relg_churches / person_count * 10000]
podes_dapil_balance[, relg_madrasah_per_10k := relg_madrasah / person_count * 10000]
podes_dapil_balance[, frac_kec_shared := u_kec_rep / u_kec]
podes_dapil_balance[, pct_desa_violence := any_violence/desa_count]

#########################
#Merge in dapil clusters#
#########################
dapil_clusters = fread('./crosswalks/dapil_clusters.csv')

setkey(dapil_clusters, election_cycle, id_kab, dapil)
setkey(podes_dapil_balance, election_cycle, id_kab, dapil)

podes_dapil_balance = dapil_clusters[podes_dapil_balance]

podes_dapil_balance[is.na(cluster), cluster := (-1:-.N) %>% as.integer]

drop = setdiff(ls(), c(old_ws, 'podes_dapil_balance')) 
rm(list = drop)
setwd(oldwd)